library(brms)
library(data.table)

# SET WORKING DIRECTORY
setwd("/mnt/c/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive/visitor_logs_analyses")

obama_logs_analysis <- fread("obama_logs_analysis.csv", header = TRUE, stringsAsFactors = FALSE)

model_formula <- brmsformula(any_visits_notours_leq0 ~ any_visits_notours_leq0_1l + 
                               log(lobbyexp_1l+1) + made_donations_l1tol8 + 
                               made_donations_l1tol8:log(amt_donations_1ltol8+1) +
                               industry_pid + inhouse_lobby +
                               ACC + ADV + AER + AGR + ALC + ANI + APP + ART +
                               AUT + AVI + BAN + BEV + BNK + BUD + CAW + CDT + CHM + CIV + COM +
                               CON + CPI + CPT + CSP + DEF + DIS + DOC + ECN + EDU + ENG + ENV +
                               FAM + FIN + FIR + FOO + FOR + FUE + GAM + GOV + HCR + HOM + HOU +
                               IMM + IND + INS + INT + LAW + LBR + MAN + MAR + MED + MIA + MIN +
                               MMM + MON + NAT + PHA + POS + REL + RES + RET + ROD + RRR + SCI +
                               SMB + SPO + TAR + TAX + TEC + TOB + TOR + TOU + TRA + TRD + TRF +
                               TRU + UNM + URB + UTI + VET + WAS + WEL +
                               (1|q|yearperiod) + 
                               (1|p|crp_orgname:crp_industry) + 
                               (1|r|crp_industry))

# USE MODEL FORMULA TO CREATE DATA FRAME TO ASSIGN PRIORS
priors_df <-get_prior(model_formula,
                      data = obama_logs_analysis, 
                      family=bernoulli(link = "logit"))

# ASSIGN DIFFUSE PRIORS FOR BETA COEFFICIENTS, VARYING INTERCEPTS FOR EACH
# GROUPING, AND CORRELATION AMONG EACH GROUPING OF VARYING INTERCEPTS
priors_df$prior <- ifelse(priors_df$class=="b", "normal(0, 10)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="cor", "lkj(1)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="sd"|priors_df$class=="Intercept", 
                          "student_t(3, 0, 10)", priors_df$prior)

# FITTING MODEL--NOTE THAT MODEL USES BOTH WITHIN- AND ACROSS-CHAIN 
# PARALLELIZATION
tableSI7_col2 <- brm(model_formula,
                        data= obama_logs_analysis, 
                        family=bernoulli(link = "logit"), 
                        prior = priors_df,
                        refresh = 10, 
                        control=list(adapt_delta=0.99999999999),
                        threads = threading(threads = 3, static = TRUE), 
                        backend = "cmdstanr",
                        chains = 4, 
                        cores = 4, 
                        iter = 2000, 
                        seed = 1989)

summary(tableSI7_col2)

# model diagnostics
# any divergent transitions?
sum(nuts_params(tableSI7_col2, pars = "divergent__")$Value)
# any rhats above 1.1?
sort(rhat(tableSI7_col2), decreasing = TRUE)[1:10]
# neff ratio sufficiently large (>0.001 for all parameters?)
sort(neff_ratio(tableSI7_col2), decreasing = FALSE)[1:10]
save(tableSI7_col2, file="tableSI7_col2.RData")